Public attitudes toward multilateral climate action

Author

Kyuin LEE

R Script

For a cross-country comparison of climate-related attitudes, I combined 2 sets of data from the Pew Research Center:

  1. Global Attitudes Survey: a global survey across 15 countries, excluding the U.S.
  2. American Trends Panel: a national survey of U.S. adults

However, the focus of this analysis was limited to the following list of countries, for which climate data were available.

The 6 countries were chosen so as to reflect geographical representation:

  • The world’s top 10 biggest polluters:
    • U.S.
    • Germany
    • South Korea
  • Countries at the forefront of taking climate action:
    • Australia
    • Sweden
    • The Netherlands

I. Getting started

### 1) Disable scientific notations
options(scipen = 999)


### 2) Install and/or load packages
pacman::p_load(tidyverse,
               haven, labelled, sjlabelled, # for working with labeled .sav data
               stargazer, ggeffects, dotwhisker, # for generating regression outputs
               gt, gtExtras) # for generating tables


### 3) Read the dataset

# Read in the Global Attitudes Survey
Global.df <- read_sav("./Dataset/Global Attitudes Survey (Spring 2021).sav")

# Read in the American Trends Panel
US.df <- read_sav("./Dataset/American Trends Panel (Wave 82).sav")


### 4) (For reference) Create a data dictionary
# A look up table for the original response categories
dictionary_global <- generate_dictionary(Global.df)
dictionary_US <- generate_dictionary(US.df)

# To retrieve the look up table:
# dictionary_US$value_labels or dictionary_global$value_labels

II. Data wrangling

a. The Global Attitudes Survey

1) Clean column names & filter observations by country

# Remove 1) prefixes 'D_' and 2) suffixes '_2017'
names(Global.df) <- str_remove_all(names(Global.df), "^D_|_2017$")



Global.df <-
  Global.df %>%
  # Assign descriptive variable names
  rename("EDUC_AUSTRALIA_SUPPLEMENTARY" = "EDUC_AUSTRALIA",
         "EDUC_AUSTRALIA" = "EDUC_AUSTRALIA_2017B") %>%
  # Subset to respondents from
  # Australia, Germany, Netherlands, South Korea, and Sweden:
  filter(public == 1 | public == 5 | public == 9 | public == 12 | public == 14) %>%
  # Instead of displaying numeric values for each national sample,
  # assign their descriptive value labels, to indicate respondents' nationality (as strings):
  mutate(COUNTRY = get_labels(public)[public])

2) Standardize the education variable across countries

# Define a custom function for cleaning up the education-related variables for
# 1) the Dutch, 2) Korean, and 3) the Swede samples:

clean_response1 <- function(x) {
  cleaned_NA1 = ifelse(x %in% c(98, 99), NA,# Code 'Don't know' and 'Refused' as NA
                       ifelse(x %in% c(8, 9, 10), 1, # 'BA degrees or higher' as 1
                              # If the data pertains to the [FOCAL COUNTRY] AND isn't a missing value, code as 0 ('No BA degree')
                              # Otherwise (i.e. if the data doesn't actually pertain to the [FOCAL COUNTRY]), code as NA.
                              ifelse(!is.na(x), 0, NA)))
  return(cleaned_NA1)
}




Global_edu_cleaned <-
  Global.df %>%
  mutate(
    # For the Australian sample,
    # if the highest year of primary/secondary school completion is 'Don't know' or 'Refused',
    # code as NA:
    EDUC_AUSTRALIA_TEMPORARY = ifelse(EDUC_AUSTRALIA_SUPPLEMENTARY %in% c(8, 9), NA,
                                      # If the respondent is *NOT* from Australia, also code them as NA. Otherwise, code as 0:
                                      ifelse(COUNTRY != 'Australia', NA, 0)),
    # Then, overwrite'EDUC_AUSTRALIA_TEMPORARY' over our measure of 'highest degree obtained',
    # *ONLY IF* the latter is missing a value.
    # The overwritten values will either be an a) 'NA' or b) '0' (for 'Without a BA degree'):
    EDUC_AUSTRALIA_NEW = ifelse(is.na(EDUC_AUSTRALIA), EDUC_AUSTRALIA_TEMPORARY,
                                ifelse(EDUC_AUSTRALIA %in% c(98, 99), NA, # recode the 'Don't know's and 'Refused's
                                       ifelse(EDUC_AUSTRALIA %in% c(5, 6, 7), 1, 0))), # recode non-missing values as 0 or 1s
    # Next, clean the 'EDUC_GERMANY' column:
    EDUC_GERMANY_NEW = ifelse(EDUC_GERMANY %in% c(8, 9), NA, # recode the 'Don't know's and 'Refused's
                              ifelse(EDUC_GERMANY %in% c(5, 6, 7), 1, # recode the 'BA or higher's
                                     # For the remaining observations in this column, code as 0 if the data is from a German sample.
                                     # Otherwise, code as NA (because these observations are from non-German samples):
                                     ifelse(!is.na(EDUC_GERMANY), 0, NA))),
    # For 1) the Netherlands, 2) Korea, and 3) Sweden,
    # apply the customized function, then append '_NEW' to the original column names:
    across(c(EDUC_NETHERLANDS, EDUC_SKOREA, EDUC_SWEDEN), ~ clean_response1(.x), .names = "{col}_NEW")) %>%
  # Collapse the recoded responses of all 5 countries into a single variable:
  mutate(
    EDUCATION = ifelse(COUNTRY == 'Australia', EDUC_AUSTRALIA_NEW,
                       ifelse(COUNTRY == 'Germany', EDUC_GERMANY_NEW,
                              ifelse(COUNTRY == 'Netherlands', EDUC_NETHERLANDS_NEW,
                                     ifelse(COUNTRY == 'South Korea', EDUC_SKOREA_NEW, EDUC_SWEDEN_NEW)))))

3) Standardize the income variable across countries

# First, code the 'Don't know's and 'Refused' as NA,
# because calculating the median income category requires the removal of NAs 
# To do so, define a custom function that converts the missing values of income-related variables:
clean_response2 <- function(x) {
  cleaned_NA2 = ifelse(x %in% c(98, 99), NA, x) # keep the non-missing values as-is
  return(cleaned_NA2)
} 


# Apply the function across all 5 countries:
Global_edu_income_cleaning <-
  Global_edu_cleaned %>%
  mutate(
    across(starts_with("INCOME_") & ends_with(c("AUSTRALIA", "GERMANY", "NETHERLANDS", "SKOREA", "SWEDEN")), ~ clean_response2(.x)),
    # Collapse the responses of each country into a single variable:
    INCOME_UNSTANDARDIZED = ifelse(COUNTRY == 'Australia', INCOME_AUSTRALIA,
                                   ifelse(COUNTRY == 'Germany', INCOME_GERMANY,
                                          ifelse(COUNTRY == 'Netherlands', INCOME_NETHERLANDS,
                                                 ifelse(COUNTRY == 'South Korea', INCOME_SKOREA, INCOME_SWEDEN)))))


# Next, find the median income category for each country
Global_income_median_UNSTANDARDIZED <-
  Global_edu_income_cleaning %>% 
  group_by(COUNTRY) %>% 
  summarize(median_income = median(INCOME_UNSTANDARDIZED, na.rm = TRUE))



# Create a summary table of median income, for each national sample
Global_income_median_UNSTANDARDIZED  %>%
  gt() %>% 
  tab_header(
    title = "The median income category of survey respondents",
    subtitle = "By country:") %>% 
  tab_source_note(
    source_note = md("**<ins>Note</ins>**: The income categories presented above are *unstandardized*.")) %>% 
  cols_label(
    COUNTRY = "Country/Nationality",
    median_income = "Median income category") %>% # rename the columns
  cols_align(align = "left") %>% # align columns to the left
  gt_theme_538()
The median income category of survey respondents
By country:
Country/Nationality Median income category
Australia 6
Germany 6
Netherlands 5
South Korea 8
Sweden 8
Note: The income categories presented above are unstandardized.
# Lastly, standardize the income variables so that:
# values above or equal to the *country* median (i.e. High income) = 1 and
# values below the *country* median (i.e. Low income) = 0

Global_edu_income_cleaned <-
  Global_edu_income_cleaning %>% 
  mutate(
    INCOME = ifelse(is.na(INCOME_UNSTANDARDIZED), NA, # Code the missing values
                    # Specify the higher income category among the Australian & German sample:
                    ifelse((COUNTRY %in% c('Australia', 'Germany') & INCOME_UNSTANDARDIZED >= 6) |
                             # Higher income category among the Dutch sample:
                             (COUNTRY == 'Netherlands' & INCOME_UNSTANDARDIZED >= 5) | 
                             # Higher income category among the South Korean sample and the Swedes:
                             # Then code as 1 if high in income, otherwise code as 0
                             (COUNTRY %in% c('South Korea', 'Sweden') &  INCOME_UNSTANDARDIZED >= 8), 1, 0)))

4) Standardize the responses across the Global and the US data

  • To perform a logistic regression, convert the dependent variable (CLIMATE_INTLCOMMUNITY) into a binary variable. Rename it as CLIMATE_CONFIDENCE
  • Collapse the scale responses and convert the independent variable (DIVERSITY_GOODBAD) into a binary variable. Rename it as DIVERSITY_GOOD
  • Code sex/gender as 1 = female, 0 = male/non-female, instead of 1s and 2s
  • Collapse the scale responses and recode political ideology as 1 = left, 2 = moderate, 3 = right
  • Classify the respondents’ age (straight age) into age categories
# Create a custom function that converts the missing values in
# our measure of 1) satisfaction with democracy and 2) climate concern:

clean_response3 <- function(x) {
  # Code 'Don't know' or 'Refused' as 'NA'. Else, keep it as is.
  cleaned_NA3 = ifelse(x %in% c(8, 9), NA, x)
  return(cleaned_NA3)
}



# Clean the entire dataset:

Global.df_FINAL <- Global_edu_income_cleaned %>%
  mutate(
    # Recode 'very confident' or 'somewhat confident' = 1
    CLIMATE_CONFIDENCE = ifelse(CLIMATE_INTLCOMMUNITY %in% c(1, 2), 1,
                                # 'not too confident' or 'not at all confident' = 0
                                ifelse(CLIMATE_INTLCOMMUNITY %in% c(3, 4), 0, NA)),
    DIVERSITY_GOOD = ifelse(DIVERSITY_GOODBAD %in% c(8, 9), NA,
                            # Recode 'diversity makes a better place to live' = 1
                            # 'diversity makes a worse place to live' or 'diversity doesn't make much difference' = 0
                            ifelse(DIVERSITY_GOODBAD == 1, 1, 0)), 
    # Apply the custom function to convert NAs
    across(c(SATISFIED_DEMOCRACY, CLIMATE_CONCERN), ~ clean_response3(.x)),
    FEMALE = ifelse(SEX == 2, 1, 0), # Dummy variable for sex/gender
    AGE_CATEGORY = case_when(
      AGE == 99 ~ NA_real_,
      (AGE >= 18 & AGE <= 29) ~ 1,
      (AGE >= 30 & AGE <= 49) ~ 2,
      (AGE >= 50 & AGE <= 64) ~ 3,
      AGE >= 65 ~ 4),
    POLITICAL_ID = ifelse(POLITICAL_SCALE2 %in% c(8, 9), NA,
                          # Code 'extreme left' ~ 'left leaning' as 1
                          ifelse(POLITICAL_SCALE2 %in% c(0, 1, 2), 1, 
                                 # Code 'right leaning' ~ 'extreme right' as 3; and 'center' as 2
                                 ifelse(POLITICAL_SCALE2 %in% c(4, 5, 6), 3, 2)))) %>%
  select(
    COUNTRY,
    CLIMATE_CONFIDENCE, # Dependent variable
    SATISFIED_DEMOCRACY, DIVERSITY_GOOD, # Independent variables
    CLIMATE_CONCERN, FEMALE, AGE_CATEGORY, POLITICAL_ID, EDUCATION, INCOME) # Controls


# Export the cleaned dataset
write.csv(Global.df_FINAL, file = "./Dataset/Global Attitudes Survey_CLEANED.csv")

c. Combine the two data sets

# Merge the Global and the US survey data sets
COMBINED_df <- rbind(Global.df_FINAL, US.df_FINAL)

# Export the compiled version of the dataset
write.csv(COMBINED_df, file = "./Dataset/Climate Attitudes Survey_FINAL.csv")

# Split the data frame according to each country
# (In order to fit 6 different regression models)
national_sample <- split(COMBINED_df, f = COMBINED_df$COUNTRY)  

III. Data analysis

2) Create 2 contingency tables for each country

# one for attitudes toward diversity (X1) and the other for satisfaction with democracy (X2)
# For examining the distribution of 0s and 1s of Y, across varying levels of X


variables <- c("SATISFIED_DEMOCRACY", "DIVERSITY_GOOD")

generate_cross_tabs <- function(country, X) {
  xtabs(~ CLIMATE_CONFIDENCE + get(X), data = national_sample[[country]])
}


for (country in countries) {
  cat("Country:", country, "\n")
  
  # Generate cross-tabulations

  # For attitudes toward diversity (X1):
  cat("For DIVERSITY_GOOD:\n")
  print(map(country, ~generate_cross_tabs(.x, "DIVERSITY_GOOD")))
  
  # For satisfaction with democracy (X2):
  cat("For SATISFIED_DEMOCRACY:\n")
  print(map(country, ~generate_cross_tabs(.x, "SATISFIED_DEMOCRACY")))
}

3) Fit a multiple logistic regression model

# Define the equation for the logistic regression model
logit_equation <-
  CLIMATE_CONFIDENCE ~ # Y variable
  DIVERSITY_GOOD + SATISFIED_DEMOCRACY + # X variables
  CLIMATE_CONCERN + FEMALE + AGE_CATEGORY + EDUCATION + POLITICAL_ID + INCOME # Control variables




# For each country, fit the model
models <- list()

for (country in countries) {
  data <- national_sample[[country]]
  glm_model <- glm(logit_equation, data = data, family = binomial("logit"))

  models[[country]] <- glm_model
  
}

4) Visualize the results of the regression analysis

# Create a regression table

regression_table <- stargazer(
  models,
  title = "The effect of diversity and democracy perceptions on perceived efficacy of multilateral climate action",
  column.labels = toupper(countries),
  align = TRUE,
  type = "text")

The effect of diversity and democracy perceptions on perceived efficacy of multilateral climate action
====================================================================================
                                          Dependent variable:                       
                    ----------------------------------------------------------------
                                           CLIMATE_CONFIDENCE                       
                    AUSTRALIA SOUTH KOREA  GERMANY  NETHERLANDS  SWEDEN       US    
                       (1)        (2)        (3)        (4)        (5)       (6)    
------------------------------------------------------------------------------------
DIVERSITY_GOOD        0.344     0.416**    0.463**   0.424***   0.560***   0.568*** 
                     (0.246)    (0.163)    (0.186)    (0.162)    (0.198)   (0.188)  
                                                                                    
SATISFIED_DEMOCRACY -0.288***   -0.184*   -0.393***  -0.655***  -0.438*** -0.720*** 
                     (0.090)    (0.097)    (0.079)    (0.087)    (0.098)   (0.064)  
                                                                                    
CLIMATE_CONCERN     -0.410***  -0.472***   -0.082    -0.172**    -0.128   -0.795*** 
                     (0.083)    (0.107)    (0.079)    (0.084)    (0.081)   (0.059)  
                                                                                    
FEMALE               0.308**    0.305*      0.181      0.097     -0.141    0.288*** 
                     (0.151)    (0.182)    (0.150)    (0.151)    (0.144)   (0.099)  
                                                                                    
AGE_CATEGORY         -0.021      0.113    0.230***    0.123*     -0.086     -0.056  
                     (0.076)    (0.091)    (0.074)    (0.071)    (0.072)   (0.050)  
                                                                                    
EDUCATION           -0.355**     0.156     -0.184    -0.625***   -0.134     -0.095  
                     (0.162)    (0.169)    (0.163)    (0.163)    (0.157)   (0.108)  
                                                                                    
POLITICAL_ID         -0.074     -0.083      0.128     0.170*      0.081   -0.346*** 
                     (0.110)    (0.117)    (0.104)    (0.098)    (0.095)   (0.073)  
                                                                                    
INCOME                0.059      0.055    -0.406***   -0.050    -0.481***   -0.047  
                     (0.166)    (0.173)    (0.155)    (0.165)    (0.151)   (0.107)  
                                                                                    
Constant             1.236**   1.588***     0.225    1.483***     0.611    3.770*** 
                     (0.511)    (0.464)    (0.432)    (0.441)    (0.449)   (0.335)  
                                                                                    
------------------------------------------------------------------------------------
Observations           782        807        834        860        960      2,382   
Log Likelihood      -512.243   -472.542   -541.840   -538.488   -605.132  -1,273.024
Akaike Inf. Crit.   1,042.486   963.084   1,101.679  1,094.975  1,228.264 2,564.047 
====================================================================================
Note:                                                    *p<0.1; **p<0.05; ***p<0.01
# Plot the coefficients and their 95% CI 


# Plot the log odds
dwplot(models) %>% 
  relabel_predictors(
    c(DIVERSITY_GOOD = "DIVERSITY IS \n BETTER",
      SATISFIED_DEMOCRACY = "SATISFACTION \n WITH DEMOCRACY \n AT HOME",
      CLIMATE_CONCERN = "CONCERNED ABOUT \n CLIMATE CHANGE",
      FEMALE = "FEMALE",
      AGE_CATEGORY = "AGE",
      EDUCATION = "EDUCATION",
      POLITICAL_ID = "POLITICAL \n IDEOLOGY",
      INCOME = "INCOME")) +
  theme_gray() +
  geom_vline(xintercept = 0, linetype = 2) + # Add a dotted line at x = 0 
  labs(
    title = "Who tends to be more optimistic of \n tackling climate action at the international level?",
    subtitle = "Positive attitudes toward social diversity and low satisfaction with the functioning of democracy
    within one's country are good predictors of one's confidence in multilateral solutions",
    x = "Coefficient Estimates \n (in log odds)",
    caption = "Source: The Pew Research Center. \n Note: Bars represent 95% Confidence Intervals.") +
  scale_color_discrete(name = "Countries",
                       labels = countries) +
  theme(plot.title =  element_text(hjust = 0.5, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        plot.caption = element_text(hjust = 1, size = 9),
        text = element_text(size = 10.5))

5) Calculate the predicted probabilities

# For each country, compute the predicted probability that an individual would be
# optimistic of the impacts of global climate action, across different levels of
# a) diversity perception and b) satisfaction with democracy

# Hold other (independent and control) variables constant at the median
# In order to draw comparisons across multiple countries,
# set the reference point to the global median (i.e. the median of all 6 countries),
# NOT the national median



# Create a list of median values, which we will pass as the parameters of ggpredict()
# Set the reference to the *global* median:
median_values <- c(
  SATISFIED_DEMOCRACY = 2,
  DIVERSITY_GOOD = 1,
  CLIMATE_CONCERN = 2,
  FEMALE = 0,
  AGE_CATEGORY = 3,
  POLITICAL_ID = 2,
  EDUCATION = 0,
  INCOME = 1)


# Store the predicted probabilities for a given model and term
generate_predicted_probs <- function(model, X_var) {
  ggpredict(model, terms = NULL, condition = median_values, pretty = TRUE)
  
}

6) Plot the predicted probabilities

predicted_probs_list <- map(models, ~ generate_predicted_probs(.x))



# Predicted probabilities for diversity perceptions


# 1. Create a predicted probabilities table that compares across individuals' social diversity beliefs
(predicted_probs_DIVERSITY <-
  predicted_probs_list %>%
  # Return it as a dataframe
  map_dfr(~ .x$DIVERSITY_GOOD, .id = "Country") %>%
  # Give a descriptive label to the 'x' column, which indicates different levels of social diversity beliefs:
  rename(DIVERSITY_GOOD = x) %>% 

# 2. Plot the predicted probabilities and confidence intervals   
  ggplot(aes(
    x = factor(DIVERSITY_GOOD),
    y = predicted,
    color = Country)) +
  geom_point() +
  geom_errorbar(aes(
    ymax = conf.high, ymin = conf.low),
    alpha = 0.25,
    width = 0.5,
    color = "black") +
  scale_y_continuous(
    limits = c(0, 1),
    n.breaks = 6) +
  scale_color_discrete(
    breaks = c(0:5),
    labels = countries) +
  labs(
    title = "The predicted probabilities of the level of confidence \n in our ability to tackle global climate change via multilateralism", 
    x = "Diversity makes the world a better place", 
    y = "Predicted probability", 
    subtitle = "A cross-country comparison along diversity perceptions", 
    caption = "Source: The Pew Research Center. \n Note: Bars represent 95% Confidence Intervals.
       The graph shows the predicted probabilities while holding all other values constant at their median.") +
  theme(
    plot.title =  element_text(hjust = 0.5, face = "bold"), 
    plot.subtitle = element_text(hjust = 0.5),
    plot.caption = element_text(hjust = 1, size = 9),
    text = element_text(size = 10.5)) +
  facet_wrap(~ Country, nrow = 1))

# Predicted probabilities for democracy perceptions 



# 1. Create a predicted probabilities table that compares across different levels of satisfaction with democracy
(predicted_probs_DEMOCRACY <-
  predicted_probs_list %>%
  # Return it as a dataframe
  map_dfr(~ .x$SATISFIED_DEMOCRACY, .id = "Country") %>%
  # Give a descriptive label to the 'x' column, which refers to different levels of satisfaction with democracy:
  rename(SATISFIED_DEMOCRACY = x) %>% 
  
  
# 2. Plot the predicted probabilities and confidence intervals  
  ggplot(aes(
    x = SATISFIED_DEMOCRACY,
    y = predicted,
    linetype = Country)) +
  geom_line() +
  geom_ribbon(aes(
    ymax = conf.high, ymin = conf.low),
    alpha = 0.25) +
  scale_y_continuous(
    limits = c(0, 1),
    n.breaks = 6) +
  scale_linetype_discrete(
    breaks = c(0:5),
    labels = countries) +
  labs(
    title = "The predicted probabilities of the level of confidence \n in our ability to tackle global climate change via multilateralism", 
    x = "Satisfaction with the way democracy functions at home", 
    y = "Predicted Probability", 
    subtitle = "A cross-country comparison along democracy perceptions", 
    caption = "Source: The Pew Research Center. \n Note: Bars represent 95% Confidence Intervals. \n The graph shows the predicted probabilities while holding all other values constant at their median.") +
  theme(
    plot.title =  element_text(hjust = 0.5, face = "bold"), 
    plot.subtitle = element_text(hjust = 0.5),
    plot.caption = element_text(hjust = 1, size = 9),
    text = element_text(size = 10.5)) +
  facet_wrap(~ Country, nrow = 1))